{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "committed-karma",
   "metadata": {},
   "source": [
    "# Dataset analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "durable-steam",
   "metadata": {},
   "outputs": [],
   "source": [
    "# imports\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import scipy as sp\n",
    "import plotly.offline as py\n",
    "import plotly.graph_objects as go\n",
    "import plotly.figure_factory as ff\n",
    "import plotly.express as px\n",
    "from plotly.subplots import make_subplots\n",
    "from pathlib import Path\n",
    "from timeeval import Datasets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "found-member",
   "metadata": {},
   "outputs": [],
   "source": [
    "data_path = Path(\"../data\") / \"test-cases\"\n",
    "# load dataset metadata\n",
    "dmgr = Datasets(data_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fuzzy-slovakia",
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_datasets(datasets, max_channels = 20):\n",
    "    if isinstance(datasets, str):\n",
    "        datasets = [datasets]\n",
    "    else:\n",
    "        datasets = datasets\n",
    "    n_datasets = len(datasets)\n",
    "    \n",
    "    # Create plot\n",
    "    fig = make_subplots(n_datasets, 1)\n",
    "    for i, d in enumerate(datasets):\n",
    "        # construct dataset ID\n",
    "        dataset_id = (\"GutenTAG\", f\"{d}.unsupervised\")\n",
    "        \n",
    "        # load dataset details\n",
    "        try:\n",
    "            df_dataset = dmgr.get_dataset_df(dataset_id)\n",
    "        except Exception as e:\n",
    "            warnings.warn(f\"Could not load dataset {d}, because {repr(e)}\")\n",
    "            continue\n",
    "\n",
    "        for j in range(1, min(df_dataset.shape[1]-1, max_channels+1)):\n",
    "            fig.add_trace(go.Scatter(\n",
    "                x=df_dataset.index,\n",
    "                y=df_dataset.iloc[:, j],\n",
    "                name=f\"{d} channel {j}\",\n",
    "            ), i+1, 1)\n",
    "\n",
    "        # mark anomaly regions\n",
    "        s = df_dataset[\"is_anomaly\"].diff()\n",
    "        anomaly_regions = zip(s[s== 1].index, s[s == -1].index)\n",
    "        for s, e in anomaly_regions:\n",
    "            fig.add_vrect(x0=s-1, x1=e,\n",
    "                          exclude_empty_subplots=True,\n",
    "                          line_width=0,\n",
    "                          fillcolor=\"red\",\n",
    "                          opacity=0.3,\n",
    "                          annotation_text=\"anomaly\",\n",
    "                          annotation_position=\"top left\",\n",
    "                          row=i+1,\n",
    "                          col=1)\n",
    "\n",
    "#     fig.update_xaxes(matches=\"x\")\n",
    "    fig.update_layout(\n",
    "        title=f\"Datasets and ground truth of {','.join(datasets)} datasets\",\n",
    "        height=200*n_datasets if n_datasets > 1 else 400\n",
    "    )\n",
    "    return py.iplot(fig)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "aboriginal-teaching",
   "metadata": {},
   "source": [
    "## Lookup all datasets and plot them"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "unlike-atmosphere",
   "metadata": {},
   "outputs": [],
   "source": [
    "datasets = np.unique([d.split(\".\")[0] for d in dmgr.get_dataset_names() if not d.startswith(\"cbf\")])\n",
    "datasets.sort()\n",
    "i = 0\n",
    "len(datasets)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "characteristic-baltimore",
   "metadata": {},
   "outputs": [],
   "source": [
    "if i >= len(datasets):\n",
    "    print(\"FINISHED!\")\n",
    "    i = 0\n",
    "else:\n",
    "    print(f\"Dataset {i}: {datasets[i]}\")\n",
    "    plot_datasets(datasets[i])\n",
    "    i += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "average-tuesday",
   "metadata": {},
   "outputs": [],
   "source": [
    "plot_datasets([\"sinus-type-mean\", \"ecg-noise-01%\", \"poly-type-trend\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "pressed-option",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
